rm(list = ls(all.names = TRUE)) #clear R environment

Load libraries

library(readr)
library(ggplot2)
library(GGally)
library(speakr)
library(dplyr)
library(tidyverse)
library(readr)
library(plyr)
library(stringi)
library(purrr)
library(broom)
library(scatterplot3d)

library(tidyr)
library(readr)
library(readxl)

#Analysis of all_data ## Tidy all_data all_data df includes all 58 participants, two groups. Only total correct responses are included for each experimental task (flanker, stroop, backward).

#Load working data
all_data <- read.csv("/Users/princesa/Desktop/diana/all_data.csv", header = TRUE)

#summary
summary(all_data)
##  participant             age              IQ         backward_correct
##  Length:58          Min.   :22.00   Min.   : 60.00   Min.   :0.000   
##  Class :character   1st Qu.:32.00   1st Qu.: 96.25   1st Qu.:1.000   
##  Mode  :character   Median :38.00   Median :107.00   Median :4.000   
##                     Mean   :39.19   Mean   :108.09   Mean   :3.138   
##                     3rd Qu.:47.00   3rd Qu.:122.25   3rd Qu.:5.000   
##                     Max.   :59.00   Max.   :146.00   Max.   :6.000   
##  flanker_correct stroop_correct 
##  Min.   : 0.00   Min.   : 0.00  
##  1st Qu.:40.50   1st Qu.:38.00  
##  Median :45.00   Median :39.00  
##  Mean   :41.91   Mean   :36.83  
##  3rd Qu.:47.00   3rd Qu.:40.00  
##  Max.   :49.00   Max.   :40.00
#separate groups: nonLanguage teachers and language teachers
nonLanguage <- all_data[1:36, ]
language <- all_data[37:58, ]

nonLanguage$language = "NonLanguage"
language$language = "Language"

#adding column to indicate nonLanguage or language for each participant
all_data = bind_rows(nonLanguage,language)

##Data Visualization of all participants with ggplot

#all_data plot
ggplot(all_data, aes(x = age, y = IQ)) +
  #geom_jitter(width = .2)
  geom_point(size = 1) +
   ggtitle("Language Plot")

  #geom_smooth(method = "") +
  #facet_grid(language~backward_correct)

Analsyis by groups

Data Analysis for nonLanguage Group {.tabset} The nonLanguage group consist of the teachers who only use one language in their professional work setting.

Linear regression model for nonLanguage BACKWARD task

data(nonLanguage)
## Warning in data(nonLanguage): data set 'nonLanguage' not found
head(nonLanguage)
##   participant age  IQ backward_correct flanker_correct stroop_correct
## 1       adnm2  36 135                5              45             39
## 2       bjfc1  40 108                4              44             38
## 3       buzt0  30 146                5              49             39
## 4       bwat2  34 128                1              46             39
## 5       cbky9  49 135                4              48             40
## 6       cfug6  34  63                0              27              2
##      language
## 1 NonLanguage
## 2 NonLanguage
## 3 NonLanguage
## 4 NonLanguage
## 5 NonLanguage
## 6 NonLanguage
##plot
plot(backward_correct ~ age + IQ, data = nonLanguage, pch=16)

#linear model
backward_model_nonLanguage <- lm(backward_correct ~ age + IQ, data = nonLanguage)
#abline(backward_model_nonLanguage)

my_results <- tidy(backward_model_nonLanguage)
my_results
## # A tibble: 3 × 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept) -0.194      2.35     -0.0828  0.934 
## 2 age         -0.00622    0.0366   -0.170   0.866 
## 3 IQ           0.0316     0.0154    2.05    0.0482
backward_model_nonLanguage
## 
## Call:
## lm(formula = backward_correct ~ age + IQ, data = nonLanguage)
## 
## Coefficients:
## (Intercept)          age           IQ  
##   -0.194378    -0.006223     0.031591
summary(backward_model_nonLanguage)
## 
## Call:
## lm(formula = backward_correct ~ age + IQ, data = nonLanguage)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3658 -2.0974  0.6713  1.3846  2.6145 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.194378   2.346418  -0.083   0.9345  
## age         -0.006223   0.036562  -0.170   0.8659  
## IQ           0.031591   0.015400   2.051   0.0482 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.934 on 33 degrees of freedom
## Multiple R-squared:  0.1174, Adjusted R-squared:  0.06393 
## F-statistic: 2.195 on 2 and 33 DF,  p-value: 0.1273
par(mfrow = c(2,2)) 
plot(backward_model_nonLanguage) 

AIC(backward_model_nonLanguage)
## [1] 154.5309
BIC(backward_model_nonLanguage)
## [1] 160.865

Linear regression model for nonLanguage FLANKER task

data(nonLanguage)
## Warning in data(nonLanguage): data set 'nonLanguage' not found
head(nonLanguage)
##   participant age  IQ backward_correct flanker_correct stroop_correct
## 1       adnm2  36 135                5              45             39
## 2       bjfc1  40 108                4              44             38
## 3       buzt0  30 146                5              49             39
## 4       bwat2  34 128                1              46             39
## 5       cbky9  49 135                4              48             40
## 6       cfug6  34  63                0              27              2
##      language
## 1 NonLanguage
## 2 NonLanguage
## 3 NonLanguage
## 4 NonLanguage
## 5 NonLanguage
## 6 NonLanguage
##plot
plot(flanker_correct ~ age + IQ, data = nonLanguage, pch=16)

#linear model
flanker_model_nonLanguage <- lm(flanker_correct ~ age + IQ, data = nonLanguage)
#abline(flanker_model_nonLanguage)

my_results <- tidy(flanker_model_nonLanguage)
my_results
## # A tibble: 3 × 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)  28.1       6.91       4.07  0.000278
## 2 age           0.0766    0.108      0.711 0.482   
## 3 IQ            0.104     0.0454     2.30  0.0282
flanker_model_nonLanguage
## 
## Call:
## lm(formula = flanker_correct ~ age + IQ, data = nonLanguage)
## 
## Coefficients:
## (Intercept)          age           IQ  
##    28.11861      0.07663      0.10418
summary(flanker_model_nonLanguage)
## 
## Call:
## lm(formula = flanker_correct ~ age + IQ, data = nonLanguage)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.9680  -0.4283   1.4999   3.2503   8.2520 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 28.11861    6.91314   4.067 0.000278 ***
## age          0.07663    0.10772   0.711 0.481859    
## IQ           0.10418    0.04537   2.296 0.028156 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.699 on 33 degrees of freedom
## Multiple R-squared:  0.1419, Adjusted R-squared:  0.08992 
## F-statistic: 2.729 on 2 and 33 DF,  p-value: 0.08002
par(mfrow = c(2,2)) 
plot(flanker_model_nonLanguage) 

AIC(flanker_model_nonLanguage)
## [1] 232.3294
BIC(flanker_model_nonLanguage)
## [1] 238.6635

Linear regression model for nonLanguage STROOP task

data(nonLanguage)
## Warning in data(nonLanguage): data set 'nonLanguage' not found
head(nonLanguage)
##   participant age  IQ backward_correct flanker_correct stroop_correct
## 1       adnm2  36 135                5              45             39
## 2       bjfc1  40 108                4              44             38
## 3       buzt0  30 146                5              49             39
## 4       bwat2  34 128                1              46             39
## 5       cbky9  49 135                4              48             40
## 6       cfug6  34  63                0              27              2
##      language
## 1 NonLanguage
## 2 NonLanguage
## 3 NonLanguage
## 4 NonLanguage
## 5 NonLanguage
## 6 NonLanguage
##plot
plot(stroop_correct ~ age + IQ, data = nonLanguage, pch=16)

#linear model
stroop_model_nonLanguage <- lm(stroop_correct ~ age + IQ, data = nonLanguage)
abline(stroop_model_nonLanguage)
## Warning in abline(stroop_model_nonLanguage): only using the first two of 3
## regression coefficients

my_results <- tidy(stroop_model_nonLanguage)
my_results
## # A tibble: 3 × 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)  19.4       6.94       2.79  0.00863
## 2 age           0.0714    0.108      0.660 0.514  
## 3 IQ            0.138     0.0456     3.03  0.00474
stroop_model_nonLanguage
## 
## Call:
## lm(formula = stroop_correct ~ age + IQ, data = nonLanguage)
## 
## Coefficients:
## (Intercept)          age           IQ  
##    19.38713      0.07142      0.13802
summary(stroop_model_nonLanguage)
## 
## Call:
## lm(formula = stroop_correct ~ age + IQ, data = nonLanguage)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.5108  -1.5472   0.5263   2.1878   7.3319 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 19.38713    6.94219   2.793  0.00863 **
## age          0.07142    0.10817   0.660  0.51370   
## IQ           0.13802    0.04556   3.029  0.00474 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.723 on 33 degrees of freedom
## Multiple R-squared:  0.2191, Adjusted R-squared:  0.1718 
## F-statistic:  4.63 on 2 and 33 DF,  p-value: 0.01689
par(mfrow = c(2,2)) 
plot(stroop_model_nonLanguage) 

AIC(stroop_model_nonLanguage)
## [1] 232.6312
BIC(stroop_model_nonLanguage)
## [1] 238.9653

Data Analysis for Language Group

The language group consist of the teachers who use more than one language in their professional work setting.

Linear regression model for language BACKWARD

data(language)
## Warning in data(language): data set 'language' not found
head(language)
##    participant age  IQ backward_correct flanker_correct stroop_correct language
## 37    bi_ajli0  35  94                4              45             38 Language
## 38    bi_dckv1  30 105                4              49             38 Language
## 39    bi_dvyf7  58 105                5              37             39 Language
## 40    bi_evur7  42  93                0              40             34 Language
## 41    bi_giah0  23 102                5              44             39 Language
## 42    bi_igsy3  50 120                6              49             39 Language
#plot Language
plot(backward_correct ~ age + IQ, data = language, pch=16)

#linear model
backward_model_language <- lm(backward_correct ~ age + IQ, data = language)
abline(backward_model_language)
## Warning in abline(backward_model_language): only using the first two of 3
## regression coefficients

my_results <- tidy(backward_model_language)
my_results
## # A tibble: 3 × 5
##   term        estimate std.error statistic   p.value
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)  -7.69      2.80       -2.75 0.0128   
## 2 age          -0.0609    0.0269     -2.27 0.0352   
## 3 IQ            0.129     0.0258      5.00 0.0000795
backward_model_language
## 
## Call:
## lm(formula = backward_correct ~ age + IQ, data = language)
## 
## Coefficients:
## (Intercept)          age           IQ  
##    -7.69311     -0.06092      0.12908
summary(backward_model_language)
## 
## Call:
## lm(formula = backward_correct ~ age + IQ, data = language)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.32845 -0.81186 -0.07596  0.90055  2.67241 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.69311    2.79829  -2.749   0.0128 *  
## age         -0.06092    0.02687  -2.267   0.0352 *  
## IQ           0.12908    0.02582   5.000 7.95e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.375 on 19 degrees of freedom
## Multiple R-squared:  0.5937, Adjusted R-squared:  0.5509 
## F-statistic: 13.88 on 2 and 19 DF,  p-value: 0.0001923
par(mfrow = c(2,2)) 
plot(backward_model_language) 

AIC(backward_model_language)
## [1] 81.20737
BIC(backward_model_language)
## [1] 85.57154

Linear regression model for language FLANKER

data(language)
## Warning in data(language): data set 'language' not found
head(language)
##    participant age  IQ backward_correct flanker_correct stroop_correct language
## 37    bi_ajli0  35  94                4              45             38 Language
## 38    bi_dckv1  30 105                4              49             38 Language
## 39    bi_dvyf7  58 105                5              37             39 Language
## 40    bi_evur7  42  93                0              40             34 Language
## 41    bi_giah0  23 102                5              44             39 Language
## 42    bi_igsy3  50 120                6              49             39 Language
#plot Language
plot(flanker_correct ~ age + IQ, data = language, pch=16)

#linear model
flanker_model_language <- lm(flanker_correct ~ age + IQ, data = language)
abline(flanker_model_language)
## Warning in abline(flanker_model_language): only using the first two of 3
## regression coefficients

my_results <- tidy(flanker_model_language)
my_results
## # A tibble: 3 × 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)   12.1      20.1       0.605  0.552 
## 2 age           -0.425     0.193    -2.21   0.0399
## 3 IQ             0.443     0.185     2.39   0.0271
flanker_model_language
## 
## Call:
## lm(formula = flanker_correct ~ age + IQ, data = language)
## 
## Coefficients:
## (Intercept)          age           IQ  
##     12.1411      -0.4251       0.4432
summary(flanker_model_language)
## 
## Call:
## lm(formula = flanker_correct ~ age + IQ, data = language)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.278  -1.917   2.854   4.847  11.242 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  12.1411    20.0687   0.605   0.5523  
## age          -0.4251     0.1927  -2.207   0.0399 *
## IQ            0.4432     0.1851   2.394   0.0271 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.858 on 19 degrees of freedom
## Multiple R-squared:  0.3319, Adjusted R-squared:  0.2616 
## F-statistic: 4.719 on 2 and 19 DF,  p-value: 0.02168
par(mfrow = c(2,2)) 
plot(flanker_model_language) 

AIC(flanker_model_language)
## [1] 167.8941
BIC(flanker_model_language)
## [1] 172.2582

Linear regression model for language STROOP

data(language)
## Warning in data(language): data set 'language' not found
head(language)
##    participant age  IQ backward_correct flanker_correct stroop_correct language
## 37    bi_ajli0  35  94                4              45             38 Language
## 38    bi_dckv1  30 105                4              49             38 Language
## 39    bi_dvyf7  58 105                5              37             39 Language
## 40    bi_evur7  42  93                0              40             34 Language
## 41    bi_giah0  23 102                5              44             39 Language
## 42    bi_igsy3  50 120                6              49             39 Language
#plot Language
par(mfrow = c(2,2))
plot(stroop_correct ~ age + IQ, data = language, pch=16)

#linear model
stroop_model_language <- lm(stroop_correct ~ age + IQ, data = language)

par(mfrow = c(2,2))

plot(stroop_model_language)

my_results <- tidy(stroop_model_language)
my_results
## # A tibble: 3 × 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)   13.6      14.9       0.914  0.372 
## 2 age           -0.313     0.143    -2.18   0.0417
## 3 IQ             0.338     0.138     2.45   0.0240
stroop_model_language
## 
## Call:
## lm(formula = stroop_correct ~ age + IQ, data = language)
## 
## Coefficients:
## (Intercept)          age           IQ  
##     13.6334      -0.3127       0.3376
summary(stroop_model_language)
## 
## Call:
## lm(formula = stroop_correct ~ age + IQ, data = language)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -26.5774  -1.6728   0.3679   2.9557  11.9590 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  13.6334    14.9160   0.914   0.3722  
## age          -0.3127     0.1432  -2.184   0.0417 *
## IQ            0.3376     0.1376   2.453   0.0240 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.327 on 19 degrees of freedom
## Multiple R-squared:  0.3359, Adjusted R-squared:  0.266 
## F-statistic: 4.805 on 2 and 19 DF,  p-value: 0.02048
par(mfrow = c(2,2)) 
plot(stroop_model_language) 

AIC(stroop_model_language)
## [1] 154.8383
BIC(stroop_model_language)
## [1] 159.2025

#ADDITIONAL ANALYSIS

Histogram to observe results

attach(all_data) #using objects in df all_data
## The following object is masked _by_ .GlobalEnv:
## 
##     language
hist(backward_correct) #this could be considered "normal"

hist(flanker_correct) #not normal

hist(stroop_correct) #not normal

summary(all_data)
##  participant             age              IQ         backward_correct
##  Length:58          Min.   :22.00   Min.   : 60.00   Min.   :0.000   
##  Class :character   1st Qu.:32.00   1st Qu.: 96.25   1st Qu.:1.000   
##  Mode  :character   Median :38.00   Median :107.00   Median :4.000   
##                     Mean   :39.19   Mean   :108.09   Mean   :3.138   
##                     3rd Qu.:47.00   3rd Qu.:122.25   3rd Qu.:5.000   
##                     Max.   :59.00   Max.   :146.00   Max.   :6.000   
##  flanker_correct stroop_correct    language        
##  Min.   : 0.00   Min.   : 0.00   Length:58         
##  1st Qu.:40.50   1st Qu.:38.00   Class :character  
##  Median :45.00   Median :39.00   Mode  :character  
##  Mean   :41.91   Mean   :36.83                     
##  3rd Qu.:47.00   3rd Qu.:40.00                     
##  Max.   :49.00   Max.   :40.00

normalize data

Attempted to normalize data to see if it would make a difference.

attach(all_data)
## The following object is masked _by_ .GlobalEnv:
## 
##     language
## The following objects are masked from all_data (pos = 3):
## 
##     age, backward_correct, flanker_correct, IQ, language, participant,
##     stroop_correct
scaledflank <- scale(flanker_correct)
scaledstroop <- scale(stroop_correct)
scaledback <- scale(backward_correct)

lang = substring(participant,1,3)=="bi_"
biling = rep("false",length(lang)) #make this variable all FALSE

biling[lang]="true" #
data <- data.frame(age,IQ,scaledflank,scaledstroop,scaledback,biling)
#colnames(data)=c("age","IQ","scaledflank","scaledstroop","scaledback","biling") #not needed but may use later

hist(scaledback)

hist(scaledstroop)

hist(scaledflank)

#——————————————## TESTING Additional plots

ggplot(data = all_data) +
  geom_point(mapping = aes(x = backward_correct, y = IQ, color = age))

scatterplot3d(
  all_data[1:4], pch = 19, color = "steelblue",
  grid = TRUE, box = FALSE,
  mar = c(3, 3, 0.5, 3)
)

# nonLanguage Scatterplot
ggplot(nonLanguage, aes(backward_correct, IQ)) +
  geom_point(fill = "dark green", color = "black",
             size = 5, shape = 21) +
  ggtitle("LanguagePlot")

#————————————————————————#

Removing 1 data (45) which may be an outlier. This was done just to observe if it would result in a difference in results.

##Removing line 45

#created new data with deleted row 45
new_all_data <- all_data[-c(45),]

#created new language group (-45)
new_language <- new_all_data[37:58, ]


#plot Language
plot(flanker_correct ~ age + IQ, data = new_language, pch=16)

#linear model
flanker_model_language <- lm(flanker_correct ~ age + IQ, data = new_language)
abline(flanker_model_language)
## Warning in abline(flanker_model_language): only using the first two of 3
## regression coefficients

my_results <- tidy(flanker_model_language)
my_results
## # A tibble: 3 × 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)   21.2      13.8        1.54  0.141 
## 2 age           -0.155     0.142     -1.09  0.290 
## 3 IQ             0.266     0.131      2.03  0.0573
flanker_model_language
## 
## Call:
## lm(formula = flanker_correct ~ age + IQ, data = new_language)
## 
## Coefficients:
## (Intercept)          age           IQ  
##     21.2068      -0.1552       0.2660
summary(flanker_model_language)
## 
## Call:
## lm(formula = flanker_correct ~ age + IQ, data = new_language)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.7273  -0.7667   2.4163   3.6353   6.0239 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  21.2068    13.7539   1.542   0.1405  
## age          -0.1552     0.1423  -1.090   0.2899  
## IQ            0.2660     0.1310   2.031   0.0573 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.693 on 18 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2021, Adjusted R-squared:  0.1135 
## F-statistic:  2.28 on 2 and 18 DF,  p-value: 0.131
par(mfrow = c(2,2)) 
plot(flanker_model_language) 

##
#plot Language
par(mfrow = c(2,2))
plot(stroop_correct ~ age + IQ, data = new_language, pch=16)

#linear model
stroop_model_language <- lm(stroop_correct ~ age + IQ, data = new_language)

par(mfrow = c(2,2))

plot(stroop_model_language)

my_results <- tidy(stroop_model_language)
my_results
## # A tibble: 3 × 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)  22.2       4.87        4.55 0.000250
## 2 age          -0.0590    0.0504     -1.17 0.257   
## 3 IQ            0.171     0.0464      3.69 0.00169
stroop_model_language
## 
## Call:
## lm(formula = stroop_correct ~ age + IQ, data = new_language)
## 
## Coefficients:
## (Intercept)          age           IQ  
##    22.15387     -0.05899      0.17098
summary(stroop_model_language)
## 
## Call:
## lm(formula = stroop_correct ~ age + IQ, data = new_language)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.4736 -1.1456 -0.2982  1.4977  4.4921 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 22.15387    4.87196   4.547  0.00025 ***
## age         -0.05899    0.05041  -1.170  0.25715    
## IQ           0.17098    0.04639   3.686  0.00169 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.371 on 18 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.4325, Adjusted R-squared:  0.3695 
## F-statistic:  6.86 on 2 and 18 DF,  p-value: 0.006101